knitr::opts_chunk$set(dev = "svg")

library(data.table)
library(dplyr)
library(table1)
library(ggpubr)
library(ggthemr)
library(ggsci)
library(cowplot)
library(ggplot2)
library(geepack)
library(sjPlot)
#library(purrr)
#library(knitr)

source("utils/toolbox.R")
source("utils/timepoints_function.R")
source("utils/custom_lollipop.R")

data_file_path <- "/Users/irenaeuschan/Documents/Irenaeus/CDK46_CH/data/"

# CH Calls
sclc_df <- fread(paste0(data_file_path, "g1_sclc_df.minimum.csv"))                       # Small Cell Lung Cancer
tnbc_df <- fread(paste0(data_file_path, "g1_tnbc_df.minimum.csv"))                       # Triple Negative Breast Cancer
crc_df <- fread(paste0(data_file_path, "g1_crc_df.minimum.csv")) 
untreated_df <- fread(paste0(data_file_path, "untreated_df.minimum.csv"))             # Untreated

g1_df <- rbind(
    sclc_df %>% mutate(Cohort = "SCLC"), 
    tnbc_df %>% mutate(Cohort = "TNBC"), 
    crc_df %>% mutate(Cohort = "CRC"), 
    untreated_df %>% mutate(Cohort = "Untreated")
)

sclc_schematic <- paste0(data_file_path, "SCLC_G1.png")
tnbc_schematic <- paste0(data_file_path, "TNBC_G1.png")
crc_schematic <- paste0(data_file_path, "CRC_G1.png")

# Timepoints Dataframe
sclc_tp <- fread(paste0(data_file_path, "sclc_timepoints.minimum.csv"))                  # Small Cell Lung Cancer
tnbc_tp <- fread(paste0(data_file_path, "tnbc_timepoints.minimum.csv"))                  # Triple Negative Breast Cancer
crc_tp <- fread(paste0(data_file_path, "crc_timepoints.minimum.csv"))                    # Colorectal Cancer

g1_tp <- rbind(
    sclc_tp %>% mutate(Cohort = if_else(Trilaciclib == "Untreated", "Untreated", "SCLC")), 
    tnbc_tp %>% mutate(Cohort = "TNBC"), 
    crc_tp %>% mutate(Cohort = "CRC")
)

# We mainly need these basic information: Age, Gender, Sex, Race, Ethnicity, Smoking
# For Age, preAge is fine because that's baseline
#sclc_demographics <- fread(paste0(data_file_path, "sclc_demographics.csv"))             # Small Cell Lung Cancer
#tnbc_demographics <- fread(paste0(data_file_path, "tnbc_demographics.csv"))             # Triple Negative Breast Cancer
#crc_demographics <- fread(paste0(data_file_path, "crc_demographics.csv"))               # Colorectal Cancer

1 Plotting Variables

panel_theme_basic <- theme_bw() + theme(
  panel.border = element_blank(),
  legend.title = element_blank(),
  legend.key.size = unit(5, "mm"),
  legend.text = element_text(size = 16),
  legend.position = "top",
  legend.direction = "horizontal",
  plot.subtitle = element_text(hjust = 0.5, size = 20),
  plot.title = element_text(face = "bold", hjust = 0, vjust = -2, size = 20),
  panel.grid.major = element_blank(),
  strip.background = element_blank(),
  strip.text = element_text(size = 18),
  axis.text.y = element_text(size = 18),
  axis.text.x = element_text(size = 18),
  axis.title = element_text(size = 20),
  axis.line = element_line(colour = "black"),
  plot.margin = margin(1, 1, 0, 1, "cm")
)

default_sig <- function(y_position, xmin, xmax, model_pvalue, model_stars, tip_length = 0.03, size = 1, textsize = 8, show_values = TRUE, hide_ns = TRUE) {
  if (show_values){
    list(y_position = y_position, xmin = c(xmin), xmax = c(xmax), annotation = paste0("p = ", format(model_pvalue, digits = 5), model_stars), tip_length = tip_length, size = size, textsize = textsize)
  } else {
    if (model_stars == "") { model_stars <- "ns"}
    if (hide_ns & model_stars == "ns") { 
      list(y_position = y_position, xmin = c(xmin), xmax = c(xmax), annotation = "", tip_length = 0, size = 0, textsize = 0)
    } else {
      list(y_position = y_position, xmin = c(xmin), xmax = c(xmax), annotation = model_stars, tip_length = tip_length, size = size, textsize = textsize)
    }
  }
}

default_sig_data <- function(data, y_position, xmin, xmax, model_pvalue, model_stars, tip_length = 0.03, size = 1, textsize = 8, show_values = TRUE, hide_ns = TRUE) {
  if (show_values) {
    list(data = data, y_position = y_position, xmin = c(xmin), xmax = c(xmax), annotation = paste0("p = ", format(model_pvalue, digits = 5), model_stars), tip_length = tip_length, size = size, textsize = textsize)
  } else {
    if (model_stars == "") { model_stars <- "ns"}
    if (hide_ns & model_stars == "ns") { 
      list(data = data, y_position = y_position, xmin = c(xmin), xmax = c(xmax), annotation = "", tip_length = 0, size = 0, textsize = 0)
    } else {
      list(data = data, y_position = y_position, xmin = c(xmin), xmax = c(xmax), annotation = model_stars, tip_length = tip_length, size = size, textsize = textsize)
    }
  }
}

add_significance_annotation <- function(p, label, pos, start, end, size_text = 10, size_line = 1) {
    p +
      annotate("text", x = (pos + 0.1), y = (start + end)/2, label = label, size = size_text) +
      annotate("segment", x = pos, xend = pos, y = start, yend = end, size = size_line) +
      annotate("segment", x = pos - 0.05, xend = pos + 0.008, y = start, yend = start, size = size_line) +
      annotate("segment", x = pos - 0.05, xend = pos + 0.008, y = end, yend = end, size = size_line)
}

signif.num <- function(x, ns = FALSE) {
  if (ns) {
    symbols = c("***", "**", "*", "ns")
  } else {
    symbols = c("***", "**", "*", "")
  }
  
  symnum(unlist(x), corr = FALSE, na = FALSE, legend = T,
         cutpoints = c(0, 0.001, 0.01, 0.05, 1), 
         symbols = symbols)
}

2 Default Variables

DDR_genes = c("TP53", "PPM1D", "CHEK2")
treatment_colors = c('Untreated' = '#C0C0C0', 'Placebo' = '#7AD3F7', 'Trilaciclib' = '#F37B79')

3 Reference Variables

g1_tp$Trilaciclib <- factor(g1_tp$Trilaciclib, levels = c("Untreated", "Trilaciclib", "Placebo"))
g1_tp$Gene_Class <- factor(g1_tp$Gene_Class, levels = c("DDR", "DTA"))
g1_tp$Gene <- factor(g1_tp$Gene, levels = c("TP53", "PPM1D", "CHEK2", "DNMT3A", "TET2", "ASXL1", "SF3B1", "SRSF2", "JAK2"))
g1_tp$Cohort <- factor(g1_tp$Cohort, levels = c("Untreated", "SCLC", "TNBC", "CRC"))

4 Figure 1B - Gene Distribution Plot

genes_bins <- g1_df %>% 
    filter(Cohort == "SCLC" | Cohort == "Untreated") %>%
    filter(putative_driver == 1) %>%
    group_by(Gene, whichdraw, Trilaciclib) %>% 
    summarise(mutation_count = n()) %>% 
    mutate(Gene = ifelse(Gene == "", "No Mutation", Gene))
## `summarise()` has grouped output by 'Gene', 'whichdraw'. You can override using
## the `.groups` argument.
genes_bins <- genes_bins %>%
    left_join( 
        g1_df %>% 
        filter(Cohort == "SCLC" | Cohort == "Untreated") %>%
        group_by(whichdraw, Trilaciclib) %>% 
        summarise(total_mutations = n())
    ) %>%
    mutate(prop = mutation_count / total_mutations)
## `summarise()` has grouped output by 'whichdraw'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(whichdraw, Trilaciclib)`
fig1b <- genes_bins %>%
    filter(whichdraw == "PreTx" | whichdraw == "PostTx") %>%
    mutate(
        Trilaciclib = factor(Trilaciclib, levels = c("Untreated", "Placebo", "Trilaciclib")),
        Gene = factor(Gene, levels = c("DNMT3A", "TET2", "ASXL1", "TP53", "PPM1D", "CHEK2", "JAK2", "SRSF2", "SF3B1")),
        whichdraw = factor(whichdraw, levels = c("PreTx", "PostTx"))
    ) %>%
    ggplot(
        aes(
            x = Gene,
            y = prop,
            fill = Trilaciclib
        )
    ) + 
    geom_bar(stat = "identity", position = position_dodge()) +
    labs(
        y = "Porportion of Mutations",
        x = '',
        fill = "Trilaciclib"
    ) +
    panel_theme_basic +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        #legend.position = c(0.8, 0.6),
        #legend.direction = "vertical",
    ) +
    scale_fill_manual(values = treatment_colors) +
    scale_y_continuous(labels = scales::percent, limits = c(NA, 0.7), expand = c(0, 0)) +
    facet_wrap(~whichdraw, ncol = 1)

5 Figure 1C - Small Cell Lung Cancer Growth Rate Plot

df_to_plot <- g1_tp %>% filter(Cohort == "SCLC" | Cohort == "Untreated")

model_growth_rate_class_geeglm <- rbind(
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            mutate(
                preAge = as.numeric(preAge)
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            mutate(
                preAge = as.numeric(preAge)
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    )
)
## Warning in diff(as.numeric(id)): NAs introduced by coercion

## Warning in diff(as.numeric(id)): NAs introduced by coercion
model_growth_rate_class <- rbind(
    glm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            mutate(
                preAge = as.numeric(preAge)
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            mutate(
                preAge = as.numeric(preAge)
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    )
)
model_growth_rate_class <- model_growth_rate_class %>% filter(term != "Age")

counts <- df_to_plot %>%
    filter(growth_rate != Inf & growth_rate != -Inf) %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%
    group_by(Trilaciclib, Gene_Class) %>%
    summarise(n = n(), .groups = "drop")

fig1c <- df_to_plot %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%       # We are filtering the data to only include the DDR and DTA gene classes.
    ggplot(
        aes(
            x = Gene_Class,                 # Notice how we are using the Gene_Class variable for the x-axis where before we were using Trilaciclib.
            y = growth_rate, 
            fill = Trilaciclib              # We will still group data by Trilaciclib, but our main focus is on the Gene_Class variable.
        )
    ) +
    geom_hline(yintercept = 0, color = "black", alpha = 0.3) +
    geom_boxplot() +
    geom_point(position = position_jitterdodge(jitter.width=0.2), alpha = 0.3) +
    geom_text(data = counts, aes(label = n, y = 0.06), size = 8, position = position_dodge(width = 0.8)) +
    labs(
        y = "Growth Rate", 
        x = '', 
        title = ""
    ) +
    panel_theme_basic +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), breaks = c(-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.05), limit = c(NA, 1.5)) +
    scale_fill_manual(values = treatment_colors) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 5, 0.75, 1, model_growth_rate_class$p.value[1], model_growth_rate_class$p.stars[1], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 5.5, 0.75, 1.25, model_growth_rate_class$p.value[2], model_growth_rate_class$p.stars[2], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 6.5, 1, 1.25, model_growth_rate_class$p.value[3], model_growth_rate_class$p.stars[3], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 5, 1.75, 2, model_growth_rate_class$p.value[4], model_growth_rate_class$p.stars[4], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 5.5, 1.75, 2.25, model_growth_rate_class$p.value[5], model_growth_rate_class$p.stars[5], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 6.5, 2, 2.25, model_growth_rate_class$p.value[6], model_growth_rate_class$p.stars[6], show_values = FALSE))

6 Figure 1 - Small Cell Lung Cancer CH Landscape

fig1a <- ggdraw() + draw_image(sclc_schematic)
cowplot::plot_grid(
    cowplot::plot_grid(
        fig1a, 
        fig1b,
        ncol = 2,
        nrow = 1,
        labels = c("A", "B"),
        label_size = 30
    ), 
    fig1c,
    ncol = 1,
    labels = c("", "C"),
    rel_heights = c(0.8, 1),
    label_size = 30
)
## Warning: Removed 21 rows containing non-finite outside the scale range
## (`stat_signif()`).
## Removed 21 rows containing non-finite outside the scale range
## (`stat_signif()`).
## Removed 21 rows containing non-finite outside the scale range
## (`stat_signif()`).
## Warning: Removed 7 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 7 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 7 rows containing non-finite outside the scale range (`stat_signif()`).

7 Figure 2C - Triple Negative Breast Cancer Growth Rate Plot

df_to_plot <- g1_tp %>% filter(Cohort == "TNBC" | Cohort == "Untreated")

model_growth_rate_class_geeglm <- rbind(
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            mutate(
                preAge = as.numeric(preAge)
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            mutate(
                preAge = as.numeric(preAge)
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    )
)
## Warning in diff(as.numeric(id)): NAs introduced by coercion

## Warning in diff(as.numeric(id)): NAs introduced by coercion
model_growth_rate_class <- rbind(
    glm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            mutate(
                preAge = as.numeric(preAge)
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            mutate(
                preAge = as.numeric(preAge)
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    )
)
model_growth_rate_class <- model_growth_rate_class %>% filter(term != "Age")

counts <- df_to_plot %>%
    filter(growth_rate != Inf & growth_rate != -Inf) %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%
    group_by(Trilaciclib, Gene_Class) %>%
    summarise(n = n(), .groups = "drop")

fig2c <- df_to_plot %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%       # We are filtering the data to only include the DDR and DTA gene classes.
    ggplot(
        aes(
            x = Gene_Class,                 # Notice how we are using the Gene_Class variable for the x-axis where before we were using Trilaciclib.
            y = growth_rate, 
            fill = Trilaciclib              # We will still group data by Trilaciclib, but our main focus is on the Gene_Class variable.
        )
    ) +
    geom_hline(yintercept = 0, color = "black", alpha = 0.3) +
    geom_boxplot() +
    geom_point(position = position_jitterdodge(jitter.width=0.2), alpha = 0.3) +
    geom_text(data = counts, aes(label = n, y = 0.06), size = 8, position = position_dodge(width = 0.8)) +
    labs(
        y = "Growth Rate", 
        x = '', 
        title = ""
    ) +
    panel_theme_basic +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), breaks = c(-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.05), limit = c(NA, 1.5)) +
    scale_fill_manual(values = treatment_colors) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 5, 0.75, 1, model_growth_rate_class$p.value[1], model_growth_rate_class$p.stars[1], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 5.5, 0.75, 1.25, model_growth_rate_class$p.value[2], model_growth_rate_class$p.stars[2], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 6.5, 1, 1.25, model_growth_rate_class$p.value[3], model_growth_rate_class$p.stars[3], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 5, 1.75, 2, model_growth_rate_class$p.value[4], model_growth_rate_class$p.stars[4], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 5.5, 1.75, 2.25, model_growth_rate_class$p.value[5], model_growth_rate_class$p.stars[5], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 6.5, 2, 2.25, model_growth_rate_class$p.value[6], model_growth_rate_class$p.stars[6], show_values = FALSE))

8 Figure 2D - Colorectal Cancer Growth Rate Plot

df_to_plot <- g1_tp %>% filter(Cohort == "CRC" | Cohort == "Untreated") %>%
    filter(compare_group == "C1D1vsMaintenance" | compare_group == "PrevsPost")

model_growth_rate_class_geeglm <- rbind(
    geeglm(
        formula = growth_rate ~ Trilaciclib,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            #mutate(
            #    preAge = as.numeric(preAge)
            #) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                #preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            #mutate(
            #    preAge = as.numeric(preAge)
            #) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                #preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    )
)
## Warning in diff(as.numeric(id)): NAs introduced by coercion

## Warning in diff(as.numeric(id)): NAs introduced by coercion

## Warning in diff(as.numeric(id)): NAs introduced by coercion

## Warning in diff(as.numeric(id)): NAs introduced by coercion
model_growth_rate_class <- rbind(
    glm(
        formula = growth_rate ~ Trilaciclib,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            # mutate(
            #     preAge = as.numeric(preAge)
            # ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                #preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            # mutate(
            #     preAge = as.numeric(preAge)
            # ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                #preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    )
)
#model_growth_rate_class <- model_growth_rate_class %>% filter(term != "Age")

counts <- df_to_plot %>%
    filter(growth_rate != Inf & growth_rate != -Inf) %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%
    group_by(Trilaciclib, Gene_Class) %>%
    summarise(n = n(), .groups = "drop")

fig2d <- df_to_plot %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%       # We are filtering the data to only include the DDR and DTA gene classes.
    ggplot(
        aes(
            x = Gene_Class,                 # Notice how we are using the Gene_Class variable for the x-axis where before we were using Trilaciclib.
            y = growth_rate, 
            fill = Trilaciclib              # We will still group data by Trilaciclib, but our main focus is on the Gene_Class variable.
        )
    ) +
    geom_hline(yintercept = 0, color = "black", alpha = 0.3) +
    geom_boxplot() +
    geom_point(position = position_jitterdodge(jitter.width=0.2), alpha = 0.3) +
    geom_text(data = counts, aes(label = n, y = 0.06), size = 8, position = position_dodge(width = 0.8)) +
    labs(
        y = "Growth Rate", 
        x = '', 
        title = ""
    ) +
    panel_theme_basic +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), breaks = c(-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.05), limit = c(NA, 1.5)) +
    scale_fill_manual(values = treatment_colors) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 5, 0.75, 1, model_growth_rate_class$p.value[1], model_growth_rate_class$p.stars[1], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 5.5, 0.75, 1.25, model_growth_rate_class$p.value[2], model_growth_rate_class$p.stars[2], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 6.5, 1, 1.25, model_growth_rate_class$p.value[3], model_growth_rate_class$p.stars[3], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 5, 1.75, 2, model_growth_rate_class$p.value[4], model_growth_rate_class$p.stars[4], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 5.5, 1.75, 2.25, model_growth_rate_class$p.value[5], model_growth_rate_class$p.stars[5], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 6.5, 2, 2.25, model_growth_rate_class$p.value[6], model_growth_rate_class$p.stars[6], show_values = FALSE))

9 Figure 2 - Additional Samples with TNBC and CRC

fig2a <- ggdraw() + draw_image(tnbc_schematic)
fig2b <- ggdraw() + draw_image(crc_schematic)
cowplot::plot_grid(
    fig2a, fig2b,
    fig2c, fig2d,
    ncol = 2,
    nrow = 2,
    labels = c("A", "B", "C", "D"),
    label_size = 30
)

10 Figure 3 - Genes

df_to_plot <- g1_tp %>% 
    filter(ifelse(
            Cohort == "CRC", 
            ifelse(compare_group == "C1D1vsMaintenance", TRUE, FALSE),
            TRUE
        )
    )

model_growth_rate_gene <- map_dfr(c("TP53", "PPM1D", "CHEK2", "DNMT3A", "TET2", "ASXL1"), function(gene) {
    print(gene)
    model_growth_rate_gene <- rbind(
        glm(
            formula = growth_rate ~ Trilaciclib + Cohort,
            data = df_to_plot %>%
                filter(Gene == gene) %>%
                filter(growth_rate != "Inf" & growth_rate != "-Inf")
        ) %>% sjPlot::get_model_data(type = "est") %>%
        mutate(
            term = ifelse(term == "TrilaciclibTrilaciclib", "UntreatedVSTrilaciclib", "UntreatedVSPlacebo"),
            gene = gene
        ),
        glm(
            formula = growth_rate ~ Trilaciclib  + Cohort,
            data = df_to_plot %>%
                filter(Gene == gene) %>%
                filter(Trilaciclib != "Untreated") %>%
                filter(growth_rate != "Inf" & growth_rate != "-Inf")
        ) %>% sjPlot::get_model_data(type = "est") %>%
        mutate(
            term = "TrilaciclibVSPlacebo",
            gene = gene
        )
    )
    model_growth_rate_gene
})
## [1] "TP53"
## Model matrix is rank deficient. Parameters `CohortCRC` were not
##   estimable.
## [1] "PPM1D"
## Model matrix is rank deficient. Parameters `CohortCRC` were not
##   estimable.
## [1] "CHEK2"
## Model matrix is rank deficient. Parameters `CohortCRC` were not
##   estimable.
## [1] "DNMT3A"
## Model matrix is rank deficient. Parameters `CohortCRC` were not
##   estimable.
## [1] "TET2"
## Model matrix is rank deficient. Parameters `CohortCRC` were not
##   estimable.
## [1] "ASXL1"
## Model matrix is rank deficient. Parameters `CohortCRC` were not
##   estimable.
model_growth_rate_gene
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax gene
UntreatedVSTrilaciclib 0.0013174 0.0017653 0.95 -0.0021425 0.0047774 0.7462765 170 0.4555004 0.00 pos 4 3.825 4.175 TP53
UntreatedVSPlacebo 0.0042362 0.0016973 0.95 0.0009096 0.0075628 2.4958712 170 0.0125648 * 0.00 * pos 3 2.825 3.175 TP53
UntreatedVSPlacebo 0.0026365 0.0015267 0.95 -0.0003557 0.0056288 1.7269597 170 0.0841749 0.00 pos 2 1.825 2.175 TP53
UntreatedVSPlacebo 0.0020654 0.0012544 0.95 -0.0003932 0.0045240 1.6465408 170 0.0996525 0.00 pos 1 0.825 1.175 TP53
TrilaciclibVSPlacebo 0.0029188 0.0012248 0.95 0.0005183 0.0053193 2.3831364 148 0.0171658 * 0.00 * pos 3 2.825 3.175 TP53
TrilaciclibVSPlacebo -0.0005711 0.0016638 0.95 -0.0038322 0.0026899 -0.3432579 148 0.7314045 -0.00 neg 2 1.825 2.175 TP53
TrilaciclibVSPlacebo -0.0026365 0.0015997 0.95 -0.0057719 0.0004988 -1.6481668 148 0.0993184 -0.00 neg 1 0.825 1.175 TP53
UntreatedVSTrilaciclib 0.0079517 0.0022275 0.95 0.0035858 0.0123175 3.5697207 227 0.0003574 *** 0.01 *** pos 4 3.825 4.175 PPM1D
UntreatedVSPlacebo 0.0135336 0.0022310 0.95 0.0091608 0.0179063 6.0660743 227 0.0000000 *** 0.01 *** pos 3 2.825 3.175 PPM1D
UntreatedVSPlacebo 0.0048641 0.0014038 0.95 0.0021128 0.0076154 3.4650485 227 0.0005301 *** 0.00 *** pos 2 1.825 2.175 PPM1D
UntreatedVSPlacebo 0.0025903 0.0014331 0.95 -0.0002186 0.0053991 1.8074218 227 0.0706965 0.00 pos 1 0.825 1.175 PPM1D
TrilaciclibVSPlacebo 0.0055819 0.0012373 0.95 0.0031568 0.0080070 4.5113409 209 0.0000064 *** 0.01 *** pos 3 2.825 3.175 PPM1D
TrilaciclibVSPlacebo -0.0022738 0.0016356 0.95 -0.0054796 0.0009319 -1.3902205 209 0.1644619 -0.00 neg 2 1.825 2.175 PPM1D
TrilaciclibVSPlacebo -0.0048641 0.0014582 0.95 -0.0077222 -0.0020060 -3.3356459 209 0.0008510 *** -0.00 *** neg 1 0.825 1.175 PPM1D
UntreatedVSTrilaciclib 0.0045921 0.0022895 0.95 0.0001048 0.0090794 2.0057413 85 0.0448839 * 0.00 * pos 4 3.825 4.175 CHEK2
UntreatedVSPlacebo 0.0069775 0.0021279 0.95 0.0028070 0.0111481 3.2790944 85 0.0010414 ** 0.01 ** pos 3 2.825 3.175 CHEK2
UntreatedVSPlacebo 0.0003993 0.0018572 0.95 -0.0032408 0.0040393 0.2149838 85 0.8297799 0.00 pos 2 1.825 2.175 CHEK2
UntreatedVSPlacebo 0.0005250 0.0018274 0.95 -0.0030567 0.0041066 0.2872828 85 0.7738958 0.00 pos 1 0.825 1.175 CHEK2
TrilaciclibVSPlacebo 0.0023854 0.0016630 0.95 -0.0008740 0.0056449 1.4343923 72 0.1514604 0.00 pos 3 2.825 3.175 CHEK2
TrilaciclibVSPlacebo 0.0001257 0.0021904 0.95 -0.0041674 0.0044189 0.0573930 72 0.9542321 0.00 pos 2 1.825 2.175 CHEK2
TrilaciclibVSPlacebo -0.0003993 0.0019826 0.95 -0.0042850 0.0034865 -0.2013899 72 0.8403937 -0.00 neg 1 0.825 1.175 CHEK2
UntreatedVSTrilaciclib -0.0005591 0.0006287 0.95 -0.0017913 0.0006732 -0.8892467 633 0.3738705 -0.00 neg 4 3.825 4.175 DNMT3A
UntreatedVSPlacebo 0.0004088 0.0006077 0.95 -0.0007822 0.0015999 0.6727396 633 0.5011130 0.00 pos 3 2.825 3.175 DNMT3A
UntreatedVSPlacebo -0.0001770 0.0006884 0.95 -0.0015262 0.0011723 -0.2570936 633 0.7971065 -0.00 neg 2 1.825 2.175 DNMT3A
UntreatedVSPlacebo 0.0002387 0.0007281 0.95 -0.0011883 0.0016657 0.3278453 633 0.7430287 0.00 pos 1 0.825 1.175 DNMT3A
TrilaciclibVSPlacebo 0.0009679 0.0005519 0.95 -0.0001139 0.0020497 1.7535998 460 0.0794991 0.00 pos 3 2.825 3.175 DNMT3A
TrilaciclibVSPlacebo 0.0004157 0.0008903 0.95 -0.0013293 0.0021606 0.4668966 460 0.6405739 0.00 pos 2 1.825 2.175 DNMT3A
TrilaciclibVSPlacebo 0.0001770 0.0006958 0.95 -0.0011867 0.0015406 0.2543790 460 0.7992028 0.00 pos 1 0.825 1.175 DNMT3A
UntreatedVSTrilaciclib 0.0001802 0.0013355 0.95 -0.0024375 0.0027978 0.1348941 252 0.8926956 0.00 pos 4 3.825 4.175 TET2
UntreatedVSPlacebo 0.0006199 0.0011867 0.95 -0.0017060 0.0029459 0.5224016 252 0.6013907 0.00 pos 3 2.825 3.175 TET2
UntreatedVSPlacebo -0.0028461 0.0019006 0.95 -0.0065712 0.0008791 -1.4974492 252 0.1342764 -0.00 neg 2 1.825 2.175 TET2
UntreatedVSPlacebo -0.0005009 0.0013645 0.95 -0.0031753 0.0021735 -0.3671012 252 0.7135435 -0.00 neg 1 0.825 1.175 TET2
TrilaciclibVSPlacebo 0.0004398 0.0014435 0.95 -0.0023893 0.0032689 0.3046779 158 0.7606115 0.00 pos 3 2.825 3.175 TET2
TrilaciclibVSPlacebo 0.0023452 0.0024174 0.95 -0.0023928 0.0070831 0.9701325 158 0.3319805 0.00 pos 2 1.825 2.175 TET2
TrilaciclibVSPlacebo 0.0028461 0.0022160 0.95 -0.0014972 0.0071893 1.2843362 158 0.1990244 0.00 pos 1 0.825 1.175 TET2
UntreatedVSTrilaciclib 0.0001074 0.0048407 0.95 -0.0093803 0.0095950 0.0221770 62 0.9823068 0.00 pos 4 3.825 4.175 ASXL1
UntreatedVSPlacebo 0.0005634 0.0039097 0.95 -0.0070996 0.0082263 0.1440929 62 0.8854271 0.00 pos 3 2.825 3.175 ASXL1
UntreatedVSPlacebo -0.0001390 0.0054832 0.95 -0.0108859 0.0106079 -0.0253561 62 0.9797710 -0.00 neg 2 1.825 2.175 ASXL1
UntreatedVSPlacebo -0.0124023 0.0049731 0.95 -0.0221494 -0.0026551 -2.4938490 62 0.0126366 * -0.01 * neg 1 0.825 1.175 ASXL1
TrilaciclibVSPlacebo 0.0004560 0.0050496 0.95 -0.0094410 0.0103530 0.0903070 44 0.9280433 0.00 pos 3 2.825 3.175 ASXL1
TrilaciclibVSPlacebo -0.0122632 0.0069414 0.95 -0.0258681 0.0013417 -1.7666787 44 0.0772820 -0.01 neg 2 1.825 2.175 ASXL1
TrilaciclibVSPlacebo 0.0001390 0.0064950 0.95 -0.0125909 0.0128689 0.0214062 44 0.9829216 0.00 pos 1 0.825 1.175 ASXL1
df_to_plot %>% filter(growth_rate != "Inf" & growth_rate != "-Inf") %>% dplyr::select(Gene, Trilaciclib) %>% table()
##         Trilaciclib
## Gene     Untreated Trilaciclib Placebo
##   TP53          23          82      70
##   PPM1D         19         108     105
##   CHEK2         14          36      40
##   DNMT3A       174         230     234
##   TET2          95          59     103
##   ASXL1         19          23      25
##   SF3B1          5           5       9
##   SRSF2          3           1       5
##   JAK2           2           3       4
counts <- df_to_plot %>%
    filter(growth_rate != Inf & growth_rate != -Inf) %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%
    group_by(Trilaciclib, Gene) %>%
    summarise(n = n(), .groups = "drop")

fig3 <- df_to_plot %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%       # We are filtering the data to only include the DDR and DTA gene classes.
    ggplot(
        aes(
            x = Gene,                 # Notice how we are using the Gene_Class variable for the x-axis where before we were using Trilaciclib.
            y = growth_rate, 
            fill = Trilaciclib              # We will still group data by Trilaciclib, but our main focus is on the Gene_Class variable.
        )
    ) +
    geom_hline(yintercept = 0, color = "black", alpha = 0.3) +
    geom_boxplot() +
    geom_point(position = position_jitterdodge(jitter.width=0.2), alpha = 0.3) +
    geom_text(data = counts, aes(label = n, y = 0.06), size = 8, position = position_dodge(width = 0.8)) +
    labs(
        y = "Growth Rate", 
        x = '', 
        title = ""
    ) +
    panel_theme_basic +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), breaks = c(-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.05), limit = c(NA, 1.5)) +
    scale_fill_manual(values = treatment_colors) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 5, 0.75, 1, model_growth_rate_gene$p.value[1], model_growth_rate_gene$p.stars[1], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 6, 0.75, 1.25, model_growth_rate_gene$p.value[2], model_growth_rate_gene$p.stars[2], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 7.1, 1, 1.25, model_growth_rate_gene$p.value[3], model_growth_rate_gene$p.stars[3], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "PPM1D"), 5, 1.75, 2, model_growth_rate_gene$p.value[4], model_growth_rate_gene$p.stars[4], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "PPM1D"), 6, 1.75, 2.25, model_growth_rate_gene$p.value[5], model_growth_rate_gene$p.stars[5], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "PPM1D"), 7.1, 2, 2.25, model_growth_rate_gene$p.value[6], model_growth_rate_gene$p.stars[6], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "CHEK2"), 5, 2.75, 3, model_growth_rate_gene$p.value[7], model_growth_rate_gene$p.stars[7], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "CHEK2"), 6, 2.75, 3.25, model_growth_rate_gene$p.value[8], model_growth_rate_gene$p.stars[8], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "CHEK2"), 7.1, 3, 3.25, model_growth_rate_gene$p.value[9], model_growth_rate_gene$p.stars[9], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "DNMT3A"), 5, 3.75, 4, model_growth_rate_gene$p.value[10], model_growth_rate_gene$p.stars[10], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "DNMT3A"), 6, 3.75, 4.25, model_growth_rate_gene$p.value[11], model_growth_rate_gene$p.stars[11], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "DNMT3A"), 7, 4, 4.25, model_growth_rate_gene$p.value[12], model_growth_rate_gene$p.stars[12], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TET2"), 5, 4.75, 5, model_growth_rate_gene$p.value[13], model_growth_rate_gene$p.stars[13], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TET2"), 6, 4.75, 5.25, model_growth_rate_gene$p.value[14], model_growth_rate_gene$p.stars[14], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TET2"), 7, 5, 5.25, model_growth_rate_gene$p.value[15], model_growth_rate_gene$p.stars[15], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "ASXL1"), 5, 5.75, 6, model_growth_rate_gene$p.value[16], model_growth_rate_gene$p.stars[16], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "ASXL1"), 6, 5.75, 6.25, model_growth_rate_gene$p.value[17], model_growth_rate_gene$p.stars[17], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "ASXL1"), 7, 6, 6.25, model_growth_rate_gene$p.value[18], model_growth_rate_gene$p.stars[18], show_values = FALSE, hide_ns = FALSE))
colors.mutation_type <- brewer.pal(8,"Dark2")[1:8]
names(colors.mutation_type) <- c("frameshift_variant", "inframe_insertion", "splice_region_variant", "inframe_deletion", 'start_lost','stop_gained','missense_variant', "synonymous_variant")

# Initialize an empty list to store the processed data
data_list <- list()

# Process the data and store it in data_list
data_list <- map(c("TP53", "PPM1D", "CHEK2", "DNMT3A", "TET2", "ASXL1"), function(gene) {
    prot.info = get_protein_info(gene)
    to_lollipop <- g1_tp %>%
        left_join(
            g1_df %>%
                dplyr::select(DeID, key, gene_aachange, VariantClass)
        ) %>%
        filter(Cohort != "Untreated") %>%
        filter(gene_aachange != "" & gene_aachange != "DNMT3A_" & gene_aachange != "TET2_" & gene_aachange != "ASXL1_NA") %>%
        mutate(
            ID = DeID,
            PROTEIN_CHANGE = gene_aachange,
            PROTEIN_POS = as.numeric(sub(".*?([0-9]+).*", "\\1", sub(".*_", "", gene_aachange))),
            hit = case_when(
                Trilaciclib == "Placebo" ~ 1,
                Trilaciclib == "Trilaciclib" ~ 2
            ),
            gene = Gene,
            mutation_type = VariantClass
        )
    return(list(gene = gene, data = to_lollipop, prot_info = prot.info))
})
## Joining with `by = join_by(DeID, key)`
## Joining with `by = join_by(DeID, key)`
## Joining with `by = join_by(DeID, key)`
## Joining with `by = join_by(DeID, key)`
## Joining with `by = join_by(DeID, key)`
## Joining with `by = join_by(DeID, key)`
# Initialize an empty list to store the ggplot objects
plot_list <- list()

# Create the plots and store them in plot_list
for (item in data_list) {
    gene <- item$gene
    to_lollipop <- item$data
    prot.info <- item$prot_info
    
    loliplot <- my_lollipop(dat.genetics.unique=to_lollipop, 
                            current.gene=gene, 
                            protein_domain=prot.info$protein_domain, 
                            protein_length=prot.info$protein_length, 
                            cutoff.hotspot = 20,
                            colors.mutation_type=colors.mutation_type)
    plot_list[[gene]] <- loliplot
}
## `summarise()` has grouped output by 'hit', 'PROTEIN_CHANGE'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'hit', 'PROTEIN_CHANGE'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'hit', 'PROTEIN_CHANGE'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'hit', 'PROTEIN_CHANGE'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'hit', 'PROTEIN_CHANGE'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'hit', 'PROTEIN_CHANGE'. You can override
## using the `.groups` argument.
# Combine the plots using plot_grid
combined_plot <- cowplot::plot_grid(plotlist = plot_list, ncol = 3)
combined_plot